## Table J.3: Coefficient Estimates from Figure 5: -----------------------------

## Instructions ----------------------------------------------------------------
# Step 1: Adjust DATA_DIR to where Data is is located
# Step 2: Adjust OUT_DIR to where Output folder is
# Step 3: Run entire script


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())
# Need to install to read/use PanelEstimate Objects
# devtools::install_github("insongkim/PanelMatch",ref = "big_data_checks")

## library
library(dplyr)
library(data.table)
library(PanelMatch)
library(kableExtra)
options(scipen=999)


# R Code to be run for PanelMatch Results. Ran on High-Performance Cluster.
# uncomment to run.


##### UNCOMMENT FROM HERE 
### SUBSETTING AND PATH
### 1. RUN ON SENATE ####
# naics_sub <- c("31", "32", "33") #### <- set industries to be analyzed
# chamber_sub <- c("senate, house") #### <- set chamber to be analyzed
# path <- "../data/don_lob_all_ind_20210203.rds" ### set path to be analyzes
# outcome_select <- "lob_any_bin"
# treatment_select <- "don_any_bin"
# 
# #### SET ANALYSIS PARAMETERS ######
# ## refinement_method <- "CBPS.weight" # set refinement
# refinement_method <- "mahalanobis" # set refinement
# size_match <- 10 # size of matched set
# lags_match_dv <- "1:3" # DV lags to match on
# covariates_match <- "l_sale + l_emp" # other covariates to match on
# exact_match <- c("govtrack_id") #
# treatments <- c("don_any_bin") # treatment variable
# treatname <- c("AnyDon") # treatment variable
# outcomes <- c("lob_any_bin")
# outcome_names <- c("AnyLob")
# chamber_names <- c("Senate", "House")
# chamber_sub <- c("senate", "house")
# 
# #### CREATE A DF WITH ANALYSIS PAREMTERS ######
# specs_df <- data.frame(chamber = rep(rep(c("senate", "house"), each = 4), 5),
#                        chamber_names = rep(rep(chamber_names, each = 4), 5),
#                        treatments = rep(treatments, each = 8),
#                        treatname = rep(treatname, each = 8),
#                        outcomes = rep(outcomes, 10),
#                        outcome_names = rep(outcome_names, 10),
#                        refinement_method = rep(refinement_method, 40),
#                        size_match = rep(size_match, 40),
#                        exact_match1 = rep(exact_match[1], 40),
#                        covariate_match = rep(covariates_match, 40),
#                        lags_match_dv = rep(lags_match_dv, 40),
#                        stringsAsFactors = FALSE) %>%
#   distinct()
# 
# specs_df <- specs_df %>% filter(chamber %in% chamber_sub & outcomes == outcome_select & treatments == treatment_select) # subset by chosen chamber
# 
# 
# 
# 
# # LOAD DATA, SUBSET, and keep only variables needed
# don_lob <- readRDS(file_in) %>%
#   filter(naics2 %in% naics_sub, chamber == chamber_sub) %>%
#   mutate(l_sale = log(sale+1), l_emp = log(emp+1)) %>%
#   select(gvkey_govtrack_id, gvkey, govtrack_id, year, naics, naics3, naics2, chamber, # IDs
#          lob_any_bin, lob_spon_bin, lob_cosp_only_bin, lob_comm_bin, # outcomes
#          don_any_bin, don_pac_bin, don_emp_bin, don_ceo_bin, don_gov_bin, # outcomes, reverse analysis
#          l_sale, l_emp) %>% # some recodings necessary
#   mutate(gvkey_govtrack_id_numeric = as.integer(as.factor(gvkey_govtrack_id)),
#          year = as.integer(year)) #
# 
# 
# # need to set as DF, if not PM breaks sometimes
# don_lob <- as.data.frame(don_lob)
# 
# # subset to smaller sample for testing
# # set.seed(1234)
# # gvkey_sub <- sample(unique(don_lob$gvkey), 100)
# # don_lob_sub <- don_lob %>% filter(gvkey %in% gvkey_sub)
# # i <- 1
# 
# # print number of rows
# print(paste0("using dataset with ", nrow(don_lob), " rows."))
# 
# 
# 
# # start loop across all outcomes
# for (i in 1:dim(specs_df)[1]) {
# 
#   # set up the respective outcome names for plot titles
#   outcome_name <- specs_df$outcome_names[i] # change with outcome used
#   chamber_name <- specs_df$chamber_names[i] #
#   treatment_name <- specs_df$treatname[i]
# 
# 
# 
#   covs_formula <- paste0("~ I(lag(", specs_df$outcomes[i], ", ", lags_match_dv, ")) + ", specs_df$covariate_match[i])
# 
# 
# 
#   ##### Run Panelmatch
#   Matches.lobby <- PanelMatch(lag = 3, lead = 0:3, time.id = "year", unit.id = "gvkey_govtrack_id_numeric",
#                               treatment = specs_df$treatments[i], match.missing = TRUE,
#                               outcome.var = specs_df$outcomes[i], qoi = "att",
#                               covs.formula = as.formula(covs_formula),
#                               exact.match.variables = c(specs_df$exact_match1[i], specs_df$exact_match2[i]), # uncommented for matching on both govtrack ID and 2-digit NAICS2
#                               ## exact.match.variables = c(specs_df$exact_match1[i]),
#                               forbid.treatment.reversal = FALSE,
#                               restrict.control.period = 3,
#                               size.match = specs_df$size_match[i],
#                               data = don_lob,
#                               refinement.method = specs_df$refinement_method[i])
# 
#   # Determine output name based on specficiation
#   output_name <- paste0("manuf_ann_", specs_df$chamber[i], "_maha_",
#                         specs_df$treatname[i], "_" , specs_df$outcome_names[i], "_jop_naics3")
# 
#   # Save PanelMatch Object
#   save(Matches.lobby, file = paste0("../output/MSets_", output_name))
#   print(paste0(i,". ", "saved **Matched Sets** object to ", "../output/MSets_", output_name))
#   gc()
# 
# 
#   # Run PanelEstimate
#   PM.lobby <- PanelEstimate(sets = Matches.lobby, data = don_lob, se.method = "bootstrap")
# 
#   # Save PanelEstimate Object
#   save(PM.lobby, file = paste0("../output/EstObj_", output_name))
#   print(paste0(i,". ", "saved **Panel Estimate** object to ", "../output/EstObj_", output_name))
#   gc()
# 
# 
# }
# 
# gc()
##### UNCOMMENT TO HERE 











### 3. Plot Results/Table ####
# number of manufacturing industries
# length(unique(don_lob$naics3))
# sort(table(don_lob$naics3))
# naics3_firms <- don_lob %>%
#   group_by(naics3) %>%
#   summarise(n = length(unique(gvkey)))
# mean(naics3_firms$n) # mean number of firms
# median(naics3_firms$n) # median number of firms


DATA_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package/data/fig5"
OUT_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package/output"    

# create table
library(kableExtra)
n_row <- 18
tab <- data.frame(Chamber = rep(NA, n_row),
                  `t+0` = rep(NA, n_row),
                  `t+1` = rep(NA, n_row),
                  `t+2` = rep(NA, n_row),
                  `t+3` = rep(NA, n_row))


#### Committee Members

# PAC results
load(file.path(DATA_DIR, "EstObj_manuf_ann_senate_maha_PacDon_CommLob"))

tab$Chamber[1] <- "\\textit{PAC}"
tab$t.0[1] <- round(PM.lobby$estimates[1], 3)
tab$t.0[2] <- paste0("(",round(PM.lobby$standard.error[1], 4), ")") 
tab$t.1[1] <- round(PM.lobby$estimates[2], 3)
tab$t.1[2] <- paste0("(",round(PM.lobby$standard.error[2], 4), ")") 
tab$t.2[1] <- round(PM.lobby$estimates[3], 3)
tab$t.2[2] <- paste0("(",round(PM.lobby$standard.error[3], 4), ")") 
tab$t.3[1] <- round(PM.lobby$estimates[4], 3)
tab$t.3[2] <- paste0("(",round(PM.lobby$standard.error[4], 4), ")") 


# CEO results
load(file.path(DATA_DIR, "EstObj_manuf_ann_senate_maha_CeoDon_CommLob"))

tab$Chamber[3] <- "\\textit{CEO}"
tab$t.0[3] <- round(PM.lobby$estimates[1], 3)
tab$t.0[4] <- paste0("(",round(PM.lobby$standard.error[1], 4), ")") 
tab$t.1[3] <- round(PM.lobby$estimates[2], 3)
tab$t.1[4] <- paste0("(",round(PM.lobby$standard.error[2], 4), ")") 
tab$t.2[3] <- round(PM.lobby$estimates[3], 3)
tab$t.2[4] <- paste0("(",round(PM.lobby$standard.error[3], 4), ")") 
tab$t.3[3] <- round(PM.lobby$estimates[4], 3)
tab$t.3[4] <- paste0("(",round(PM.lobby$standard.error[4], 4), ")")

# EMP results
load(file.path(DATA_DIR, "EstObj_manuf_ann_senate_maha_EmpDon_CommLob"))

tab$Chamber[5] <- "\\textit{EMP}"
tab$t.0[5] <- round(PM.lobby$estimates[1], 3)
tab$t.0[6] <- paste0("(",round(PM.lobby$standard.error[1], 4), ")") 
tab$t.1[5] <- round(PM.lobby$estimates[2], 3)
tab$t.1[6] <- paste0("(",round(PM.lobby$standard.error[2], 4), ")") 
tab$t.2[5] <- round(PM.lobby$estimates[3], 3)
tab$t.2[6] <- paste0("(",round(PM.lobby$standard.error[3], 4), ")") 
tab$t.3[5] <- round(PM.lobby$estimates[4], 3)
tab$t.3[6] <- paste0("(",round(PM.lobby$standard.error[4], 4), ")")


### Bill Co-Sponsorship
load(file.path(DATA_DIR, "EstObj_manuf_ann_senate_maha_PacDon_CoSpLob"))

tab$Chamber[1+6] <- "\\textit{PAC}"
tab$t.0[1+6] <- round(PM.lobby$estimates[1], 3)
tab$t.0[2+6] <- paste0("(",round(PM.lobby$standard.error[1], 4), ")") 
tab$t.1[1+6] <- round(PM.lobby$estimates[2], 3)
tab$t.1[2+6] <- paste0("(",round(PM.lobby$standard.error[2], 4), ")") 
tab$t.2[1+6] <- round(PM.lobby$estimates[3], 3)
tab$t.2[2+6] <- paste0("(",round(PM.lobby$standard.error[3], 4), ")") 
tab$t.3[1+6] <- round(PM.lobby$estimates[4], 3)
tab$t.3[2+6] <- paste0("(",round(PM.lobby$standard.error[4], 4), ")") 


# CEO results
load(file.path(DATA_DIR, "EstObj_manuf_ann_senate_maha_CeoDon_CoSpLob"))

tab$Chamber[3+6] <- "\\textit{CEO}"
tab$t.0[3+6] <- round(PM.lobby$estimates[1], 3)
tab$t.0[4+6] <- paste0("(",round(PM.lobby$standard.error[1], 4), ")") 
tab$t.1[3+6] <- round(PM.lobby$estimates[2], 3)
tab$t.1[4+6] <- paste0("(",round(PM.lobby$standard.error[2], 4), ")") 
tab$t.2[3+6] <- round(PM.lobby$estimates[3], 3)
tab$t.2[4+6] <- paste0("(",round(PM.lobby$standard.error[3], 4), ")") 
tab$t.3[3+6] <- round(PM.lobby$estimates[4], 3)
tab$t.3[4+6] <- paste0("(",round(PM.lobby$standard.error[4], 4), ")")

# EMP results
load(file.path(DATA_DIR, "EstObj_manuf_ann_senate_maha_EmpDon_CoSpLob"))

tab$Chamber[5+6] <- "\\textit{EMP}"
tab$t.0[5+6] <- round(PM.lobby$estimates[1], 3)
tab$t.0[6+6] <- paste0("(",round(PM.lobby$standard.error[1], 4), ")") 
tab$t.1[5+6] <- round(PM.lobby$estimates[2], 3)
tab$t.1[6+6] <- paste0("(",round(PM.lobby$standard.error[2], 4), ")") 
tab$t.2[5+6] <- round(PM.lobby$estimates[3], 3)
tab$t.2[6+6] <- paste0("(",round(PM.lobby$standard.error[3], 4), ")") 
tab$t.3[5+6] <- round(PM.lobby$estimates[4], 3)
tab$t.3[6+6] <- paste0("(",round(PM.lobby$standard.error[4], 4), ")")


#### Bill Sponsorship

# PAC results
load(file.path(DATA_DIR, "EstObj_manuf_ann_senate_maha_PacDon_SpLob"))

tab$Chamber[1+12] <- "\\textit{PAC}"
tab$t.0[1+12] <- round(PM.lobby$estimates[1], 3)
tab$t.0[2+12] <- paste0("(",round(PM.lobby$standard.error[1], 4), ")") 
tab$t.1[1+12] <- round(PM.lobby$estimates[2], 3)
tab$t.1[2+12] <- paste0("(",round(PM.lobby$standard.error[2], 4), ")") 
tab$t.2[1+12] <- round(PM.lobby$estimates[3], 3)
tab$t.2[2+12] <- paste0("(",round(PM.lobby$standard.error[3], 4), ")") 
tab$t.3[1+12] <- round(PM.lobby$estimates[4], 3)
tab$t.3[2+12] <- paste0("(",round(PM.lobby$standard.error[4], 4), ")") 


# CEO results
load(file.path(DATA_DIR, "EstObj_manuf_ann_senate_maha_CeoDon_SpLob"))

tab$Chamber[3+12] <- "\\textit{CEO}"
tab$t.0[3+12] <- round(PM.lobby$estimates[1], 3)
tab$t.0[4+12] <- paste0("(",round(PM.lobby$standard.error[1], 4), ")") 
tab$t.1[3+12] <- round(PM.lobby$estimates[2], 3)
tab$t.1[4+12] <- paste0("(",round(PM.lobby$standard.error[2], 4), ")") 
tab$t.2[3+12] <- round(PM.lobby$estimates[3], 3)
tab$t.2[4+12] <- paste0("(",round(PM.lobby$standard.error[3], 4), ")") 
tab$t.3[3+12] <- round(PM.lobby$estimates[4], 3)
tab$t.3[4+12] <- paste0("(",round(PM.lobby$standard.error[4], 4), ")")

# EMP results
load(file.path(DATA_DIR, "EstObj_manuf_ann_senate_maha_EmpDon_SpLob"))

tab$Chamber[5+12] <- "\\textit{EMP}"
tab$t.0[5+12] <- round(PM.lobby$estimates[1], 3)
tab$t.0[6+12] <- paste0("(",round(PM.lobby$standard.error[1], 4), ")") 
tab$t.1[5+12] <- round(PM.lobby$estimates[2], 3)
tab$t.1[6+12] <- paste0("(",round(PM.lobby$standard.error[2], 4), ")") 
tab$t.2[5+12] <- round(PM.lobby$estimates[3], 3)
tab$t.2[6+12] <- paste0("(",round(PM.lobby$standard.error[3], 4), ")") 
tab$t.3[5+12] <- round(PM.lobby$estimates[4], 3)
tab$t.3[6+12] <- paste0("(",round(PM.lobby$standard.error[4], 4), ")")

colnames(tab) <- c("Chamber", "\\textit{t+0}", "\\textit{t+1}", "\\textit{t+2}", "\\textit{t+3}")

options(knitr.kable.NA = "") 
tab %>%
  kable(format = "latex", escape = FALSE, align = "lcccc", 
        caption = "\\textbf{Coefficient Estimates from Figure 5}: This table reports the estimated coecients
and associated standard errors, computed based on a block-bootstrap procedure, which are used
to produce Figure 5. The number of observations is 3,058,224 firm-politician-year dyads. To
compute these results, we identified all dyads in which a given interest group donated to a given
politician for the first time in three years. For PAC donations, that number is 2,662. For CEO
donations, it's 792. For Employee donations, it's 934.", 
        label = "tab_j3") %>%
  pack_rows("Committee Members", 1, 6) %>%
  pack_rows("Bill Co-Sponsorship", 7, 12) %>%
  pack_rows("Bill Sponsorship", 13, 18) %>%
  add_header_above(c(" " = 1, "Estimated Effect of Donation on Probability to Lobby in:" = 4), line = FALSE) %>%
  add_header_above(c(" " = 1, "Dependent Variable:" = 4), italic = TRUE)  %>%
  footnote(general = "Std. errors in parentheses",footnote_as_chunk = TRUE) %>%
  kable_styling(full_width = TRUE) %>%
  save_kable(file.path(OUT_DIR, "table_j3.tex")) 


# clean up
rm(list = ls())
gc()